****************************************************************************
****************************************************************************
** 							   PAKISTAN PREPARE					      	  **
*						CENTER FOR GLOBAL DEVELOPMENT   				   *
*							    EDUCATION TEAM							   *
***************************      March - 2022	   	************************
****************************************************************************
*																		   *
*								Cleaning DO-FILE						   *
*																		   *
****************************************************************************

* List of abbreviations:
* hoh: head of household
* hh: household
* oc: oldest child
* yc: youngest child
* ch: child
* ad: adult
* mar: married
* pc: pre-covid
* sch: school
* sc: school closures
* cv: covid
* op: opinion
* vl: violence
* wld: would
* vacc: vaccinate
* pp: policy preference
* fem: female
* num: number
* pw: per week

use "20220404 CGDEV_HH_Survey.dta", clear

** PART 1) RENAME VARIABLES

	** Primary information
rename (ls04 ls05 ls06 ls06_a ls07 survey_1 ls09 ls09_o ls10 ls10_o ls10_a ls10_b ls11 ls11_o survey_2 ls12) (education_level owner_of_phone head_of_household hoh_fem hh_size num_sch_age_ch occupation occupation_other oc_relation oc_relation_other fath_education moth_education marital_status marital_status_other gender women_work)

	** Household Characteristics & asset wealth		
rename (a01 a02 a03 a04 a05 a06 a07 a08 a08_o a09 a10 a10_o) (fridge radio tv motorbike car stove internet toilet toilet_other toilet_share drink_water_source drink_water_source_other)

	** Children's characteristics & education				
local q_codes 1 2 3 4 4_a 4_a1 4_a2 4_b 4_b1 5 5_a 6 6_a 6_b 7 7_a 7_b 7_c 7_c_label 7_c_1 7_c_2 7_c_3 7_c_4 7_c_5 7_c_6 7_c_7 7_c_8 7_d 8 8_b 8_a 8_a_o 9 9_a 9_b 9_b1

local newnames name disabled female age married mar_year mar_month mar_nxt_12_mnths mar_nxt_12_mnths_month been_pregnant pregnant_age pc_enrolled pc_sch_type pc_grade enrolled sch_type sch_same sch_reason sch_reason_label sch_reason_teaching sch_reason_cheaper sch_reason_near sch_reason_fem_teachers sch_reason_male_teachers sch_reason_teacher_behave sch_reason_environment sch_reason_only grade sch_returned sch_num_days_pw sch_nt_return_reason sch_nt_return_reason_other sch_will_return sch_wont_return_reason sch_non_formal sch_non_formal_rating

	** Parents' perceptions of hydrid learning/education
rename (ed01 ed02 ed02_a ed03 ed03_a ed04 ed04_a) (sc_negative sc_tv sc_tv_rating sc_radio sc_radio_rating sc_online sc_online_rating)

rename (ed05 ed05_label ed05_1 ed05_2 ed05_3 ed05_4 ed05_5 ed05_6 ed05_7 ed05_8) (sc_other_boys sc_other_boys_label sc_other_boys_tutor sc_other_boys_online_paid sc_other_boys_books sc_other_boys_internet sc_close_other_boys_parents sc_other_boys_siblings sc_other_boys_none sc_other_boys_na)

rename (ed06 ed06_label ed06_1 ed06_2 ed06_3 ed06_4 ed06_5 ed06_6 ed06_7 ed06_8) (sc_other_girls sc_other_girls_label sc_other_girls_tutor sc_other_girls_online_paid sc_other_girls_books sc_other_girls_internet sc_other_girls_parents sc_other_girls_siblings sc_other_girls_none sc_other_girls_na)
	
	** Access to health care during the pandemic
rename (h01 h01_o) (hh_health_decider hh_health_decider_other)

rename (h02 h03 h03_label h03_1 h03_2 h03_3 h03_4 h03_5 h03_6 h03_7 h03_8) (tested tested_hh tested_hh_label tested_none tested_wife tested_husband tested_male_ch tested_fem_ch tested_fem_ad tested_male_ad tested_all)

rename (h04 h04_a h04_a_label h04_a_1 h04_a_2 h04_a_3 h04_a_4 h04_a_5 h04_a_6) (wld_vacc wld_not_vacc_reason wld_not_vacc_reason_label wld_not_vacc_vaccinated wld_not_vacc_side_effects wld_not_vacc_safety wld_not_vacc_ineffective wld_not_vacc_women_health wld_not_vacc_fertility)

rename (h05 h05_label h05_1 h05_2 h05_3 h05_4 h05_5 h05_6 h05_7 h05_8) (allow_vacc allow_vacc_label allow_vacc_none allow_vaccine_wife allow_vacc_husband allow_vacc_male_ch allow_vacc_fem_ch allow_vacc_fem_ad allow_vacc_male_ad allow_vacc_all)
	
	** Economic shocks during COVID-19		
rename (sh01 sh02 sh03 sh04 sh05 sh06) (migrated_village migrated_village_because_cv lacked_food lacked_food_cv lacked_cash lacked_cash_cv)


				***** Child answers *****


	** ASER Children's test & effects of SNC
rename (aser0001 aser0002_o aser0002_y aser0003_o aser0003_y aser005) (allow_talk_ch allow_talk_oc allow_talk_yc not_allow_reason_oc not_allow_reason_yc pencil_paper)

rename (aser01 aser02 aser03 aser04 aser05 snc01 snc02 snc04 snc05) (aser_1_correct aser_2_correct aser_3_correct aser_4_correct aser_5_correct op_ch_men_leaders op_ch_mother_work op_ch_pakistan_better op_ch_pakistan_proud)


				*****  Adult again  *****

				
	** Gender based violence: Vignette		
rename (gb00 gb01 gb02 gb10 gb11 gb12) (ch_alone vl_change_women vl_change_young  vl_teacher_boys vl_teacher_girls vl_teacher_op)
	
	** Policy Preferences & gender attitudes
rename (pp03 pp02 ga01 ga02 ga03_a ga03_b ga04) (pp_education_budget pp_girls_scholarships op_ad_mother_work op_ad_women_work op_ad_women_work_num_men op_ad_women_work_num_women legal_age_mar_girl)
	
note list

	gen 	female=0 if gender==0
	replace female=1 if gender==1
	ren sc_close_other_boys_parents sc_other_boys_parents
	ren allow_vaccine_wife allow_vacc_wife
	
	** Create asset index
	qui tab toilet, gen(toilet)
	qui tab drink_water_source, gen(water)	
	pca fridge radio tv motorbike car stove internet water?
	predict assetunstandard
	egen assetindex=std(assetunstandard)	
	xtile q = assetindex, n(4)	
	qui tab q, gen(q)
	lab var q1 "Quintile 1 (Poorest)"
	lab var q4 "Quintile 4 (Richest)"

	** Keep only valid responses
	keep if phone_response==1
	keep if answered_response==1
	
	** Post-stratification
	
	egen type = group(province internet water1)
	
	gen weight = 3.63075028 if type == 1
	replace weight = 4.310519813 if type == 2
	replace weight = 3.729567891 if type == 3
	replace weight = 6.736173391 if type == 4
	replace weight = 0.751810758 if type == 5
	replace weight = 1.38142579 if type == 6
	replace weight = 1.307087488 if type == 7
	replace weight = 3.675776047 if type == 8
	replace weight = 0.912079405 if type == 9
	replace weight = 0.39283832 if type == 10
	replace weight = 1.702315316 if type == 11
	replace weight = 1.327548571 if type == 12
	replace weight = 0.860626372 if type == 13
	replace weight = 0.563976857 if type == 14
	replace weight = 0.777847922 if type == 15
	replace weight = 1.595498712 if type == 16
	
	replace weight = 4 if weight > 4

	replace weight = weight / 1.084865 // mean of weight
	
	** Labels
	label define lab 1 "1 - Poorest" 2 "2" 3 "3" 4 "4 - Richest"
	lab val q lab
	
	** converting from proportions to percentages
	foreach var of varlist sc_tv sc_radio sc_online tested wld_vacc tested_none tested_husband tested_male_ch tested_fem_ch tested_fem_ad tested_male_ad tested_all allow_vacc_none allow_vacc_wife allow_vacc_husband allow_vacc_male_ch allow_vacc_fem_ch allow_vacc_male_ad allow_vacc_fem_ad allow_vacc_all wld_not_vacc_vaccinated wld_not_vacc_side_effects wld_not_vacc_safety wld_not_vacc_ineffective wld_not_vacc_women_health wld_not_vacc_fertility sc_other_boys_none sc_other_boys_tutor sc_other_boys_parents sc_other_boys_siblings sc_other_boys_internet sc_other_boys_online_paid sc_other_boys_books sc_other_boys_na sc_other_girls_none sc_other_girls_tutor sc_other_girls_parents sc_other_girls_siblings sc_other_girls_internet sc_other_girls_online_paid sc_other_girls_books sc_other_girls_na {
		replace `var'=`var'*100
		}
		
	drop if caseid_e == "16701"

	** Saving wide version for household analysis
	save "PakPrepAnalysis_wide.dta", replace
	
	** descriptives
	tab province survey_version, col // somewhat more Balochistan in 1, but too much Pubjab
	tab education_level survey_version, col // 1 is less educated
	tab hoh_fem survey_version, col // 1 slightly more female hoh_fem
	tab hh_size survey_version, col // roughly equivalent
	tab fath_education survey_version, col // 1 has less educated fathers
	tab moth_education survey_version, col // and less educated mothers
	tab marital_status survey_version, col // 1 slightly more married
	tab gender survey_version, col // similar
	tab women_work survey_version, col // similar
	tab q survey_version, col // 1 more weighted to 2, less to 4
	tab internet survey_version, col // 1 has less internet
	tab water1 survey_version, col // 1 has more piped water
	
	tab internet // 20.5%, even further from PSLM
	tab female // 16.7%, similar to before
	tab hh_size // 8.15, compared to 8.1 in main sample, slightly further from PSLM
	tab water1 // similar to before
	
	
	********** Cleaning data for child-level analysis ************

	// Dropping survey 1 observations (849 rows)
	keep if survey_version==2

	// Drop missing variables
	missings dropvars, force
	
	
	** Saving long version for child-level analysis
	reshape long @_index @_ch01 @_ch04 @_ch02 @_ch03 @_ch04_a @_ch04_a1 @_ch04_a2 @_ch04_b @_ch04_b1 @_ch05 @_ch06 @_ch06_a @_ch06_b @_ch07 @_ch07_a @_ch07_b @_ch07_c @_ch07_c_label @_ch07_c_1 @_ch07_c_2 @_ch07_c_3 @_ch07_c_4 @_ch07_c_5 @_ch07_c_6 @_ch07_c_7 @_ch07_c_8 @_ch07_d @_ch08 @_ch08_b @_ch08_a @_ch08_a_o @_ch09 @_ch09_a @_ch09_b @_ch09_b1 checkpoint_2_, i(caseid_e) j(child) string
	
	** renaming
	ren _ch04 age_post_covid
	ren _ch06 enrolled_pre_covid
	ren _ch07 enrolled_post_covid
	ren _ch03 child_gender
	ren _ch02 disability
	ren _ch09 will_return
	ren _ch06_b grade_pre_covid
	ren _ch07_d grade_post_covid
	ren _ch08_b days_pw
	
	** Saving wide version for household analysis
	save "PakPrepAnalysis_long.dta", replace
	
	
	